# Replication materials for 
# Peisker, J. (2023)
# Context matters: The drivers of environmental concern in European regions

library(BMS)
citation("BMS")
library(xtable)
citation("xtable")
library(scales)
citation("scales")
library(tidyverse)
citation("tidyverse")
library(cowplot)
citation("cowplot")

print(sessionInfo(), locale = FALSE)
# R version 4.2.2 (2022-10-31)
# Platform: x86_64-pc-linux-gnu (64-bit)
# Running under: Arch Linux
# 
# Matrix products: default
# BLAS:   /usr/lib/libblas.so.3.11.0
# LAPACK: /usr/lib/liblapack.so.3.11.0
# 
# attached base packages:
#  [1] stats     graphics  grDevices utils     datasets  methods   base     
# 
# other attached packages:
#  [1] cowplot_1.1.1   forcats_0.5.1   stringr_1.4.0   dplyr_1.0.10    purrr_0.3.4     readr_1.4.0     tidyr_1.1.3    
#  [8] tibble_3.1.2    ggplot2_3.3.5   tidyverse_1.3.1 scales_1.1.1    xtable_1.8-4    BMS_0.3.4      
# 
# loaded via a namespace (and not attached):
#  [1] Rcpp_1.0.9       cellranger_1.1.0 pillar_1.6.1     compiler_4.2.2   dbplyr_2.1.1     tools_4.2.2      jsonlite_1.7.2  
#  [8] lubridate_1.7.10 lifecycle_1.0.3  gtable_0.3.0     pkgconfig_2.0.3  rlang_1.0.6      reprex_2.0.0     DBI_1.1.1       
# [15] cli_3.4.1        rstudioapi_0.13  haven_2.4.1      xml2_1.3.2       withr_2.5.0      httr_1.4.2       fs_1.5.2        
# [22] generics_0.1.0   vctrs_0.5.0      hms_1.1.0        grid_4.2.2       tidyselect_1.1.1 glue_1.4.2       R6_2.5.0        
# [29] fansi_0.5.0      readxl_1.3.1     modelr_0.1.8     magrittr_2.0.3   backports_1.2.1  ellipsis_0.3.2   rvest_1.0.0     
# [36] assertthat_0.2.1 colorspace_2.0-1 utf8_1.2.1       stringi_1.7.8    munsell_0.5.0    broom_1.0.1      crayon_1.4.1    

# set working directory
setwd("")
theme_set(theme_bw())

# load model matrix and variable labels
load("data.RData")

# set cutoff value for transformed coefficients
msd_cutoff <- 1.6

# run Bayesian model averaging
yrs <- as.character(2000:2019)
ind_vars <- names(mm_sq)[!(names(mm_sq) %in% c("env_concern"))]
ind_vars
bma_ind_sq <- bms(
  X.data = mm_sq,
  mcmc = "bd.int", #birth-death sampler
  burn = 1e6,
  iter = 2e6,
  g = "BRIC", #g = max(N,K²) = N
  mprior = "random",
  mprior.size = length(ind_vars) / 2
)
bma_ind_sq
plot(bma_ind_sq)

# PMP of top 100 models
sum(topmodels.bma(bma_ind_sq)["PMP (Exact)",1:100])
# PMP of top 500 models
sum(topmodels.bma(bma_ind_sq)["PMP (Exact)",1:500])

out_ind_sq <- 
  coef(bma_ind_sq, exact = FALSE, std.coefs = FALSE) %>% 
  as_tibble(rownames = "var") %>% 
  transmute(
    var, PIP, 
    `Post. mean` = `Post Mean`, 
    `Post. SD` = `Post SD`, 
    `Mean/SD` = ifelse(`Post SD` < 0.00001, "", abs(`Post. mean`) / `Post. SD`)
  )   %>% 
  arrange(var)

#### make Table B.4 ####
tab_ind <- lapply(seq_along(ind_names$var), function(i) {
  var <- ind_names$var[i]
  coef_l <- out_ind_sq$`Post. mean`[out_ind_sq$var == var]
  pip_l <- out_ind_sq$PIP[out_ind_sq$var == var]
  m_sd_l <- out_ind_sq$`Mean/SD`[out_ind_sq$var == var]
  var2 <- paste0(var, "#", var)
  
  # quadratic term included in model space
  if (var2 %in% out_ind_sq$var){
    pip_sq <- out_ind_sq$PIP[out_ind_sq$var == var2]
    m_sd_sq <- out_ind_sq$`Mean/SD`[out_ind_sq$var == var2]
    
    # linear term not robust
    if (pip_l <= 0.5 | m_sd_l <= msd_cutoff) {
      return(
        out_ind_sq[out_ind_sq$var %in% c(var, var2),] %>% mutate(robust = FALSE)
      )
    }
    # linear term robust 
    if (pip_l > 0.5 & m_sd_l > msd_cutoff) {
      if (pip_sq > 0.5 & m_sd_sq > msd_cutoff) {
        return(
          out_ind_sq[out_ind_sq$var %in% c(var, var2),] %>% mutate(robust = TRUE)
        )
      } else {
        return(
          out_ind_sq[out_ind_sq$var %in% c(var),] %>% 
            bind_rows(out_ind_sq[out_ind_sq$var %in% c(var2),]) %>% 
            mutate(robust = c(TRUE, FALSE))
        )
      }
    }
  } 
})

# clean up variable names, highlight robust terms
tab_ind2 <- 
  tab_ind %>%
  bind_rows() %>% 
  filter(!grepl("20", var)) %>% 
  mutate(
    var2 = var,
    var = str_replace(var, "(#.*)", "")
  ) %>% 
  left_join(ind_names, by = "var") %>% 
  mutate(
    Variable = if_else(grepl("#", var2), paste0(Variable, "²"), Variable)
  ) %>% 
  arrange(Variable) %>% 
  mutate(
    Variable = if_else(robust, paste0("\\textbf{", Variable, "}"), Variable)
  ) %>% 
  select(Variable, PIP, `Post. mean`, `Post. SD`, `Mean/SD`)
tab_ind2 %>% print(n = 100)

print.xtable(
  xtable(tab_ind2, digits = 3), 
  file = "Tables/bma_ind_sq.tex",
  include.rownames = FALSE,
  math.style.negative = TRUE,
  floating = FALSE,
  booktabs = TRUE,
  sanitize.text.function	= identity
)

#### plots of non-linear relationships ####
#### economy ####
plot_vars_econ <- ind_names$var[ind_names$Category %in% c("Economy")] 
names(plot_vars_econ) <- ind_names$Variable[ind_names$Category %in% c("Economy")]

plot_list_econ <- lapply(seq_along(plot_vars_econ), function(i) {
  var <- plot_vars_econ[i]
  coef_l <- out_ind_sq$`Post. mean`[out_ind_sq$var == var]
  pip_l <- out_ind_sq$PIP[out_ind_sq$var == var]
  m_sd_l <- out_ind_sq$`Mean/SD`[out_ind_sq$var == var]
  var2 <- paste0(var, "#", var)
  coef_sq <- 0
  
  # quadratic term included
  if (var2 %in% out_ind_sq$var){
    pip_sq <- out_ind_sq$PIP[out_ind_sq$var == var2]
    m_sd_sq <- out_ind_sq$`Mean/SD`[out_ind_sq$var == var2]
    
    # covariate not robust
    if (
      (pip_l <= 0.5 | m_sd_l <= msd_cutoff)
    ) {
      return()
    }
    # quadratic term robust if
    if (
      (pip_sq > 0.5 & m_sd_sq > msd_cutoff)
    ) {
      coef_sq <- out_ind_sq$`Post. mean`[out_ind_sq$var == var2]
    }
  }  else {
    # only linear term included
    # linear term not robust
    if (pip_l <= 0.5 | m_sd_l <= msd_cutoff) {
      return()
    }
  }
  
  sq_function <- function(x){
    coef_l*x + coef_sq*x^2
  }
  return(
    data.frame(x = seq(-2, 2, 0.05)) %>% 
      ggplot(aes(x)) +
      geom_hline(yintercept = 0, color = "grey70") +
      geom_vline(xintercept = 0, color = "grey70") +
      stat_function(fun = sq_function) + 
      scale_y_continuous(limits = c(-10, 10), labels = label_percent(scale = 1)) +
      labs(x = names(plot_vars_econ)[i], y = "p")
  )
})

plot_list_econ2 <- plot_list_econ[!(sapply(plot_list_econ, is.null))]
sq_plot_econ <- plot_grid(plotlist = plot_list_econ2, labels = "auto", ncol = 4)
sq_plot_econ

#### individual and population characteristics ####
plot_vars_pop <- ind_names$var[ind_names$Category %in% c("Population", "Individual")] 
names(plot_vars_pop) <- ind_names$Variable[ind_names$Category %in% c("Population", "Individual")]

plot_list_pop <- lapply(seq_along(plot_vars_pop), function(i) {
  var <- plot_vars_pop[i]
  coef_l <- out_ind_sq$`Post. mean`[out_ind_sq$var == var]
  pip_l <- out_ind_sq$PIP[out_ind_sq$var == var]
  m_sd_l <- out_ind_sq$`Mean/SD`[out_ind_sq$var == var]
  var2 <- paste0(var, "#", var)
  coef_sq <- 0
  
  # quadratic term included
  if (var2 %in% out_ind_sq$var){
    pip_sq <- out_ind_sq$PIP[out_ind_sq$var == var2]
    m_sd_sq <- out_ind_sq$`Mean/SD`[out_ind_sq$var == var2]
    
    # covariate not robust
    if (
      (pip_l <= 0.5 | m_sd_l <= msd_cutoff)
    ) {
      return()
    }
    # quadratic term robust if
    if (
      (pip_sq > 0.5 & m_sd_sq > msd_cutoff)
    ) {
      coef_sq <- out_ind_sq$`Post. mean`[out_ind_sq$var == var2]
    }
  }  else {
    # only linear term included
    # linear term not robust
    if (pip_l <= 0.5 | m_sd_l <= msd_cutoff) {
      return()
    }
  }
  
  sq_function <- function(x){
    coef_l*x + coef_sq*x^2
  }
  return(
    data.frame(x = seq(-2, 2, 0.05)) %>% 
      ggplot(aes(x)) +
      geom_hline(yintercept = 0, color = "grey70") +
      geom_vline(xintercept = 0, color = "grey70") +
      stat_function(fun = sq_function) + 
      scale_y_continuous(limits = c(-10, 10), labels = label_percent(scale = 1)) +
      labs(x = names(plot_vars_pop)[i], y = "p")#ifelse(i == 1, "p", "")
  )
})

plot_list_pop2 <- plot_list_pop[!(sapply(plot_list_pop, is.null))]
sq_plot_pop <- plot_grid(plotlist = plot_list_pop2, labels = "auto", ncol = 4)
sq_plot_pop

#### environmental variables ####
plot_vars_env <- ind_names$var[ind_names$Category == "Environment"] 
names(plot_vars_env) <- ind_names$Variable[ind_names$Category == "Environment"]

plot_list_env <- lapply(seq_along(plot_vars_env), function(i) {
  var <- plot_vars_env[i]
  coef_l <- out_ind_sq$`Post. mean`[out_ind_sq$var == var]
  pip_l <- out_ind_sq$PIP[out_ind_sq$var == var]
  m_sd_l <- out_ind_sq$`Mean/SD`[out_ind_sq$var == var]
  var2 <- paste0(var, "#", var)
  coef_sq <- 0
  
  # quadratic term included
  if (var2 %in% out_ind_sq$var){
    pip_sq <- out_ind_sq$PIP[out_ind_sq$var == var2]
    m_sd_sq <- out_ind_sq$`Mean/SD`[out_ind_sq$var == var2]
    
    # covariate not robust
    if (
      (pip_l <= 0.5 | m_sd_l <= msd_cutoff) 
    ) {
      return()
    }
    # quadratic term robust if
    if (
      (pip_sq > 0.5 & m_sd_sq > msd_cutoff)
    ) {
      coef_sq <- out_ind_sq$`Post. mean`[out_ind_sq$var == var2]
    }
  }  else {
    # only linear term included
    # linear term not robust
    if (pip_l <= 0.5 | m_sd_l <= msd_cutoff) {
      return()
    }
  }
  
  sq_function <- function(x){
    coef_l*x + coef_sq*x^2
  }
  return(
    data.frame(x = seq(-2, 2, 0.05)) %>% 
      ggplot(aes(x)) +
      geom_hline(yintercept = 0, color = "grey70") +
      geom_vline(xintercept = 0, color = "grey70") +
      stat_function(fun = sq_function) + 
      scale_y_continuous(limits = c(-10, 10), labels = label_percent(scale = 1)) +
      labs(x = names(plot_vars_env)[i], y = "p")#ifelse(i == 1, "p", "")
  )
})

plot_list_env2 <- plot_list_env[!(sapply(plot_list_env, is.null))]
sq_plot_env <- plot_grid(plotlist = plot_list_env2, labels = "auto", ncol = 4)
sq_plot_env
